FullConnection
传入输入矩阵与权重矩阵,执行全连接计算(矩阵乘法),并可选择性地叠加偏置项与激活函数, 最终输出结果矩阵。
\[ \begin{align}\begin{aligned}dst_{i,j} = \sum_{k=0}^{K-1} A_{i,k} \cdot B_{k,j} + bias_{i,j}\\dst_{i,j} = activation(dst_{i,j})\end{aligned}\end{align} \]
其中激活函数支持 ReLU 与 ReLU6。
- 输入:
A - 输入矩阵地址,形状为
M × K。B - 权重矩阵地址,形状为
K × N。bias - 偏置矩阵地址,形状为
M × N,可为NULL。- params - 参数打包成数组,结构如下:
M - 输出矩阵行数。
N - 输出矩阵列数。
K - 中间维度大小。
activation_type - 激活函数类型。
A_transpose - 是否转置输入矩阵A(0: 不转置, 1: 转置)。
B_transpose - 是否转置权重矩阵B(0: 不转置, 1: 转置)。
t_A - 转置后输入矩阵A的地址(仅当A_transpose=1时有效)。
t_B - 转置后权重矩阵B的地址(仅当B_transpose=1时有效)。
core_mask - 核掩码(仅适用于共享存储版本)。
- 输出:
C - 输出矩阵地址,形状为
M × N。
- 支持平台:
FT78NEMT7004
备注
FT78NE 支持fp, int8
MT7004 支持hp, fp
activation_type 支持
ACTIVATION_NONE、ACTIVATION_RELU、ACTIVATION_RELU6
共享存储版本:
-
void i8_full_connection_s(int8_t *A, int8_t *B, int8_t *C, int8_t *bias, long long *params, int core_mask)
-
void fp_full_connection_s(float *A, float *B, float *C, float *bias, long long *params, int core_mask)
-
void hp_full_connection_s(half *A, half *B, half *C, half *bias, long long *params, int core_mask)
C调用示例:
1//FT78NE示例
2#include <stdio.h>
3#include <fullconnection.h>
4
5int main(int argc, char* argv[]) {
6 float* A_ref = (float*)0x81000000;
7 float* B_ref = (float*)0x82000000;
8 float* C_ref = (float*)0x83000000;
9 float* bias_ref = (float*)0x84000000;
10
11 float* C_single = (float*)0x85000000;
12
13 int M = 8;
14 int N = 8;
15 int K = 8;
16
17 bool A_transpose = true;
18 bool B_transpose = false;
19 float *t_A = (float*)0x10060000;
20 float *t_B = (float*)0x10070000;
21
22 // Initialize A, B, bias with small values
23 for (i = 0; i < M * K; ++i) {
24 A_ref[i] = (float)(i % 10) * 0.1f;
25 }
26 for (i = 0; i < K * N; ++i) {
27 B_ref[i] = (float)(i % 10) * 0.1f;
28 }
29 for (i = 0; i < M * N; ++i) {
30 C_ref[i] = 0.0f;
31 C_single[i] = 0.0f;
32 bias_ref[i] = (float)(i % 5) * 0.01f;
33 }
34
35 long long params[10];
36
37 params[0] = (long long)M;
38 params[1] = (long long)N;
39 params[2] = (long long)K;
40 params[3] = (long long)ACTIVATION_RELU;
41 params[4] = (long long)A_transpose;
42 params[5] = (long long)B_transpose;
43 params[6] = (long long)t_A;
44 params[7] = (long long)t_B;
45
46 int core_mask = 0xff;
47 fp_full_connection_s(A_ref, B_ref, C_single, bias_ref, params, core_mask);
48 return 0;
49}
私有存储版本:
-
void i8_full_connection_p(int8_t *A, int8_t *B, int8_t *C, int8_t *bias, long long *params)
-
void fp_full_connection_p(float *A, float *B, float *C, float *bias, long long *params)
-
void hp_full_connection_p(half *A, half *B, half *C, half *bias, long long *params)
C调用示例:
1//FT78NE示例
2#include <stdio.h>
3#include <fullconnection.h>
4
5int main(int argc, char* argv[]) {
6 float* A_ref = (float*)0x10010000;
7 float* B_ref = (float*)0x10020000;
8 float* C_ref = (float*)0x10030000;
9 float* bias_ref = (float*)0x10040000;
10
11 float* C_single = (float*)0x10050000;
12
13 int M = 8;
14 int N = 8;
15 int K = 8;
16
17 bool A_transpose = true;
18 bool B_transpose = false;
19 float *t_A = (float*)0x10060000;
20 float *t_B = (float*)0x10070000;
21
22 // Initialize A, B, bias with small values
23 for (i = 0; i < M * K; ++i) {
24 A_ref[i] = (float)(i % 10) * 0.1f;
25 }
26 for (i = 0; i < K * N; ++i) {
27 B_ref[i] = (float)(i % 10) * 0.1f;
28 }
29 for (i = 0; i < M * N; ++i) {
30 C_ref[i] = 0.0f;
31 C_single[i] = 0.0f;
32 bias_ref[i] = (float)(i % 5) * 0.01f;
33 }
34
35 long long params[10];
36
37 params[0] = (long long)M;
38 params[1] = (long long)N;
39 params[2] = (long long)K;
40 params[3] = (long long)ACTIVATION_RELU;
41 params[4] = (long long)A_transpose;
42 params[5] = (long long)B_transpose;
43 params[6] = (long long)t_A;
44 params[7] = (long long)t_B;
45
46 fp_full_connection_p(A_ref, B_ref, C_single, bias_ref, params);
47}